from deep_translator import GoogleTranslator
from datetime import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.font_manager
import pandas as pd
import numpy as np
import folium as f
import openpyxl
import json
%matplotlib inline
output_path = 'output'
geojson_path = 'maps/world/world-administrative-boundaries.geojson'
save_path = f'{output_path}/countries_imports.csv'
def write_csv(df, path):
df.to_csv(path, index=True, header=True)
def open_csv(path):
return pd.read_csv(path)
def open_json(path):
return json.load(open(path))
def get_date():
return str(dt.now().strftime("%Y-%m-%d-%H-%M-%S"))
def save_map(m):
m.save(f"{output_path}\map.html")
def save_plot(plt, direction):
return "Image saved" if plt.savefig(f'{output_path}\\{direction}_barplot-{get_date()}.png', dpi=1200, bbox_inches='tight') else None
# Reading
path = 'raw_data/Perfil_EmpresasImpo_2022_WEB.xlsx'
df = pd.read_excel(path, engine='openpyxl')
# Cleaning
df = pd.DataFrame(df['País_origen'].value_counts())
df.dropna()
# Modificaré las columnas con rename(), así que no coloco los países como índices.
df = df.reset_index()
# Filtering
mask = (df['País_origen'] == 'Sin información') | (df['País_origen'] == 'República Dominicana')
df = df[~mask]
df.rename(columns={"País_origen":"Country", "count":"Total_imports"}, inplace=True)
df
| Country | Total_imports | |
|---|---|---|
| 0 | China | 203794 |
| 1 | Estados Unidos de América | 154279 |
| 2 | España | 25455 |
| 3 | Italia | 17690 |
| 4 | México | 17593 |
| ... | ... | ... |
| 186 | San Cristóbal y Nieves | 1 |
| 187 | Brunéi | 1 |
| 188 | Libia | 1 |
| 189 | Burkina Faso | 1 |
| 190 | Islas Åland | 1 |
190 rows × 2 columns
# Pruebas con los nombres de algunos países
# dictionary={'ä':'a','ö':'o','Ä':'A','å':'a', 'Å': 'a', 'ü': 'u'}
# df['Country'] = df['Country'].replace(dictionary, regex=True, inplace=True)
# Country column translation in order to match geojson countries for displaying the data on the map.
df['Country'] = df['Country'].apply(lambda x: str(GoogleTranslator(source='es', target='en').translate(x)))
df['Country'] = df['Country'].replace(['The Savior', 'Türkiye'], ['El Salvador', 'Turkey'])
df['Country'] = df['Country'].apply(lambda x: str(x.split('(')[0].strip()))
df
| Country | Total_imports | |
|---|---|---|
| 0 | China | 203794 |
| 1 | United States of America | 154279 |
| 2 | Spain | 25455 |
| 3 | Italy | 17690 |
| 4 | Mexico | 17593 |
| ... | ... | ... |
| 186 | Saint Kitts and Nevis | 1 |
| 187 | Brunei | 1 |
| 188 | Libya | 1 |
| 189 | Burkina Faso | 1 |
| 190 | Aland Islands | 1 |
190 rows × 2 columns
# Setting Country as the DataFrame index.
df.set_index('Country', inplace=True)
df
| Total_imports | |
|---|---|
| Country | |
| China | 203794 |
| United States of America | 154279 |
| Spain | 25455 |
| Italy | 17690 |
| Mexico | 17593 |
| ... | ... |
| Saint Kitts and Nevis | 1 |
| Brunei | 1 |
| Libya | 1 |
| Burkina Faso | 1 |
| Aland Islands | 1 |
190 rows × 1 columns
# Saving processed data
write_csv(df, save_path)
# Ahora trabajaré con la data procesada.
df = open_csv(save_path)
df.set_index('Country', inplace=True)
# Organizamos la data de manera descendente.
df = df.sort_values(by='Total_imports', ascending=False)
df
| Total_imports | |
|---|---|
| Country | |
| China | 203794 |
| United States of America | 154279 |
| Spain | 25455 |
| Italy | 17690 |
| Mexico | 17593 |
| ... | ... |
| Tuvalu | 1 |
| Mali | 1 |
| South Georgia and the South Sandwich Islands | 1 |
| Montserrat | 1 |
| Aland Islands | 1 |
190 rows × 1 columns
# Horizontal Bars Plot
fig, ax = plt.subplots()
bp = df[:10] # Top 10
# Save the chart so we can loop through the bars below.
bars = ax.bar(
x=np.arange(bp.size),
height=bp['Total_imports'],
tick_label=bp.index
)
# Axis formatting.
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')
ax.tick_params(bottom=False, left=False)
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)
# Add text annotations to the top of the bars.
bar_color = bars[0].get_facecolor()
for bar in bars:
ax.text(
bar.get_x() + bar.get_width() / 2,
bar.get_height() + 0.3,
round(bar.get_height(), 1),
horizontalalignment='center',
verticalalignment='bottom',
color=bar_color,
weight='bold'
)
# Add labels and a title.
ax.set_xlabel('País de origen (p)', labelpad=15, color='#333333')
ax.set_ylabel('Cantidad de importaciones (n)', labelpad=15, color='#333333')
ax.set_title('Importaciones de mercancía [2022]', pad=15, color='#333333',
weight='bold')
ax.margins(0.01, None)
fig.autofmt_xdate()
fig.tight_layout()
save_plot(fig, 'vertical')
# Ordenamos de manera ascendente para graficar de abajo hacia arriba.
bp = bp.sort_values(by='Total_imports', ascending=True)
# Spliting data for x and y.
names = bp.index
values = bp['Total_imports']
lim = bp['Total_imports'][-1] * 1.1
# Setting font
plt.rcParams['font.family'] = 'DejaVu Sans'
plt.rcParams['font.sans-serif'] = 'DejaVu Sans'
# Set the style of the axes and the text color
plt.rcParams['axes.edgecolor']='#333F4B'
plt.rcParams['axes.linewidth']=0.8
plt.rcParams['xtick.color']='#333F4B'
plt.rcParams['ytick.color']='#333F4B'
plt.rcParams['text.color']='#333F4B'
# Numeric placeholder for the y axis
my_range=list(range(1, len(bp.index)+1))
fig, ax = plt.subplots(figsize=(5,3.5))
# Create for each expense type an horizontal line that starts at x = 0 with the length
# represented by the specific expense percentage value.
plt.hlines(y=my_range, xmin=0, xmax=bp['Total_imports'], color='#007ACC', alpha=0.2, linewidth=5)
# create for each expense type a dot at the level of the expense percentage value
plt.plot(bp['Total_imports'], my_range, "o", markersize=5, color='#007ACC', alpha=0.6)
# set labels
ax.set_xlabel('Importaciones', fontsize=15, fontweight='black', color = '#333F4B')
ax.set_ylabel('')
# set axis
ax.tick_params(axis='both', which='major', labelsize=12)
plt.yticks(my_range, bp.index)
# add an horizonal label for the y axis
fig.text(-0.23, 0.96, 'País de Origen', fontsize=15, fontweight='black', color = '#333F4B')
# change the style of the axis spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_bounds((1, len(my_range)))
ax.set_xlim(0, lim)
ax.spines['left'].set_position(('outward', 8))
ax.spines['bottom'].set_position(('outward', 5))
save_plot(plt, 'horizontal')
# Displaying the data on the map.
center = [35.762887,84.083132]
mapa = f.Map(location=center, zoom_start=2,
min_zoom=1, max_bounds=True,
min_lat=-84, min_lon=-175,
max_lat=84, max_lon=187,
control_scale=True,
tiles="cartodb positron"
)
file_json = geojson_path
data_geojson = open_json(file_json)
f.Choropleth(geo_data=data_geojson,
name = 'Importaciones',
data=df,
columns=(df.index, 'Total_imports'),
key_on="properties.name",
fill_color="Pastel2",
fill_opacity=.7,
line_opacity=.1,
nan_fill_color='gray',
line_color = "#0000",
show=True,
overlay=True,
nan_fill_opacity=0.1,
legend_name='Países que exportaron mercancía hacia República Dominicana (2022)',
highlight= True,
reset=True
).add_to(mapa)
f.LayerControl().add_to(mapa)
mapa
save_map(mapa)
print("Total de paises que importaron a República Dominicana en 2022:", len(df.index))
Total de paises que importaron a República Dominicana en 2022: 190